{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "27bc87b7",
   "metadata": {},
   "source": [
    "# Nebula Graph Store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bde39e3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-llms-openai\n",
    "%pip install llama-index-embeddings-openai\n",
    "%pip install llama-index-graph-stores-nebula\n",
    "%pip install llama-index-llms-azure-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "032264ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For OpenAI\n",
    "\n",
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"INSERT OPENAI KEY\"\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from llama_index.core import Settings\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "\n",
    "# define LLM\n",
    "# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors\n",
    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "\n",
    "Settings.llm = llm\n",
    "Settings.chunk_size = 512"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6fd36e3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For Azure OpenAI\n",
    "import os\n",
    "import json\n",
    "import openai\n",
    "from llama_index.llms.azure_openai import AzureOpenAI\n",
    "from llama_index.embeddings.openai import OpenAIEmbedding\n",
    "from llama_index.core import (\n",
    "    VectorStoreIndex,\n",
    "    SimpleDirectoryReader,\n",
    "    KnowledgeGraphIndex,\n",
    ")\n",
    "\n",
    "from llama_index.core import StorageContext\n",
    "from llama_index.graph_stores.nebula import NebulaGraphStore\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "logging.basicConfig(\n",
    "    stream=sys.stdout, level=logging.INFO\n",
    ")  # logging.DEBUG for more verbose output\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "openai.api_type = \"azure\"\n",
    "openai.api_base = \"https://<foo-bar>.openai.azure.com\"\n",
    "openai.api_version = \"2022-12-01\"\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"<your-openai-key>\"\n",
    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
    "\n",
    "llm = AzureOpenAI(\n",
    "    model=\"<foo-bar-model>\",\n",
    "    engine=\"<foo-bar-deployment>\",\n",
    "    temperature=0,\n",
    "    api_key=openai.api_key,\n",
    "    api_type=openai.api_type,\n",
    "    api_base=openai.api_base,\n",
    "    api_version=openai.api_version,\n",
    ")\n",
    "\n",
    "# You need to deploy your own embedding model as well as your own chat completion model\n",
    "embedding_model = OpenAIEmbedding(\n",
    "    model=\"text-embedding-ada-002\",\n",
    "    deployment_name=\"<foo-bar-deployment>\",\n",
    "    api_key=openai.api_key,\n",
    "    api_base=openai.api_base,\n",
    "    api_type=openai.api_type,\n",
    "    api_version=openai.api_version,\n",
    ")\n",
    "\n",
    "Settings.llm = llm\n",
    "Settings.chunk_size = chunk_size\n",
    "Settings.embed_model = embedding_model"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "be3f7baa-1c0a-430b-981b-83ddca9e71f2",
   "metadata": {},
   "source": [
    "## Using Knowledge Graph with NebulaGraphStore"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "75f1d565-04e8-41bc-9165-166dc89b6b47",
   "metadata": {},
   "source": [
    "#### Building the Knowledge Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d0b2364-4806-4656-81e7-3f6e4b910b5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import KnowledgeGraphIndex, SimpleDirectoryReader\n",
    "from llama_index.core import StorageContext\n",
    "from llama_index.graph_stores.nebula import NebulaGraphStore\n",
    "\n",
    "\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c297fd3-3424-41d8-9d0d-25fe6310ab62",
   "metadata": {},
   "outputs": [],
   "source": [
    "documents = SimpleDirectoryReader(\n",
    "    \"../../../../examples/paul_graham_essay/data\"\n",
    ").load_data()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "832b4970",
   "metadata": {},
   "source": [
    "## Prepare for NebulaGraph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7270af8b",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install nebula3-python\n",
    "\n",
    "os.environ[\"NEBULA_USER\"] = \"root\"\n",
    "os.environ[\n",
    "    \"NEBULA_PASSWORD\"\n",
    "] = \"<password>\"  # replace with your password, by default it is \"nebula\"\n",
    "os.environ[\n",
    "    \"NEBULA_ADDRESS\"\n",
    "] = \"127.0.0.1:9669\"  # assumed we have NebulaGraph 3.5.0 or newer installed locally\n",
    "\n",
    "# Assume that the graph has already been created\n",
    "# Create a NebulaGraph cluster with:\n",
    "# Option 0: `curl -fsSL nebula-up.siwei.io/install.sh | bash`\n",
    "# Option 1: NebulaGraph Docker Extension https://hub.docker.com/extensions/weygu/nebulagraph-dd-ext\n",
    "# and that the graph space is called \"paul_graham_essay\"\n",
    "# If not, create it with the following commands from NebulaGraph's console:\n",
    "# CREATE SPACE paul_graham_essay(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);\n",
    "# :sleep 10;\n",
    "# USE paul_graham_essay;\n",
    "# CREATE TAG entity(name string);\n",
    "# CREATE EDGE relationship(relationship string);\n",
    "# CREATE TAG INDEX entity_index ON entity(name(256));\n",
    "\n",
    "space_name = \"paul_graham_essay\"\n",
    "edge_types, rel_prop_names = [\"relationship\"], [\n",
    "    \"relationship\"\n",
    "]  # default, could be omit if create from an empty kg\n",
    "tags = [\"entity\"]  # default, could be omit if create from an empty kg"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "f0edbc99",
   "metadata": {},
   "source": [
    "## Instantiate GPTNebulaGraph KG Indexes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "370fd08f-56ff-4c24-b0c4-c93116a6d482",
   "metadata": {},
   "outputs": [],
   "source": [
    "graph_store = NebulaGraphStore(\n",
    "    space_name=space_name,\n",
    "    edge_types=edge_types,\n",
    "    rel_prop_names=rel_prop_names,\n",
    "    tags=tags,\n",
    ")\n",
    "\n",
    "storage_context = StorageContext.from_defaults(graph_store=graph_store)\n",
    "\n",
    "# NOTE: can take a while!\n",
    "index = KnowledgeGraphIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    "    max_triplets_per_chunk=2,\n",
    "    space_name=space_name,\n",
    "    edge_types=edge_types,\n",
    "    rel_prop_names=rel_prop_names,\n",
    "    tags=tags,\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "c39a0eeb-ef16-4982-8ba8-b37c2c5f4437",
   "metadata": {},
   "source": [
    "#### Querying the Knowledge Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "670300d8-d0a8-4201-bbcd-4a74b199fcdd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Starting query: Tell me more about Interleaf\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Query keywords: ['Interleaf', 'history', 'software', 'company']\n",
      "ERROR:llama_index.indices.knowledge_graph.retrievers:Index was not constructed with embeddings, skipping embedding usage...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6aa6a716-7390-4783-955b-8169fab25bb1: worth trying.\n",
      "\n",
      "Our teacher, professor Ulivi, was a nice guy. He could see I w...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 79f2a1b4-80bb-416f-a259-ebfc3136b2fe: on a map of New York City: if you zoom in on the Upper East Side, there's a t...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 1e707b8c-b62a-4c1a-a908-c79e77b9692b: buyers pay a lot for such work. [6]\n",
      "\n",
      "There were plenty of earnest students to...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 31c2f53c-928a-4ed0-88fc-df92dba47c33: for example, that the reason the color changes suddenly at a certain point is...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: f51d8a1c-06bc-45aa-bed1-1714ae4e5fb9: the software is an online store builder and you're hosting the stores, if you...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 008052a0-a64b-4e3c-a2af-4963896bfc19: Engineering that seemed to be at least as big as the group that actually wrot...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: b1f5a610-9e0a-4e3e-ba96-514ae7d63a84: closures stored in a hash table on the server.\n",
      "\n",
      "It helped to have studied art...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: f7cc82a7-76e0-4a06-9f50-d681404c5bce: of Robert's apartment in Cambridge. His roommate was away for big chunks of t...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: db626325-035a-4f67-87c0-1e770b80f4a6: want to be online, and still don't, not the fancy ones. That's not how they s...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 73e76f4b-0ebe-4af6-9c2d-6affae81373b: But in the long term the growth rate takes care of the absolute number. If we...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
      "software ['is', 'web app', 'common', 'now']\n",
      "software ['is', 'web app', \"wasn't clear\", 'it was possible']\n",
      "software ['generate', 'web sites']\n",
      "software ['worked', 'via web']\n",
      "software ['is', 'web app']\n",
      "software ['has', 'three main parts']\n",
      "software ['is', 'online store builder']\n",
      "Lisp ['has dialects', 'because']\n",
      "Lisp ['rare', 'C++']\n",
      "Lisp ['is', 'language']\n",
      "Lisp ['has dialects', '']\n",
      "Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']\n",
      "Lisp ['was regarded as', 'language of AI']\n",
      "Lisp ['defined by', 'writing an interpreter']\n",
      "Lisp ['was meant to be', 'formal model of computation']\n",
      "Interleaf ['added', 'scripting language']\n",
      "Interleaf ['made software for', 'creating documents']\n",
      "Interleaf ['was how I learned that', 'low end software tends to eat high end software']\n",
      "Interleaf ['was', 'on the way down']\n",
      "Interleaf ['on the way down', '1993']\n",
      "RISD ['was', 'art school']\n",
      "RISD ['counted me as', 'transfer sophomore']\n",
      "RISD ['was', 'supposed to be the best art school in the country']\n",
      "RISD ['was', 'the best art school in the country']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'robert']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'Robert']\n",
      "Robert ['wrote', 'shopping cart']\n",
      "Robert Morris ['offered', 'unsolicited advice']\n",
      "Yorkville ['is', 'tiny corner']\n",
      "Yorkville [\"wasn't\", 'rich']\n",
      "online ['is not', 'publishing online']\n",
      "online ['is not', 'publishing online', 'means', 'you treat the online version as the primary version']\n",
      "web app ['common', 'now']\n",
      "web app [\"wasn't clear\", 'it was possible']\n",
      "editor ['written by', 'author']\n",
      "shopping cart ['written by', 'Robert', 'wrote', 'shopping cart']\n",
      "shopping cart ['written by', 'Robert']\n",
      "shopping cart ['written by', 'robert', 'wrote', 'shopping cart']\n",
      "shopping cart ['written by', 'robert']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'Robert']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'robert']\n",
      "Robert ['wrote', 'shopping cart']\n",
      "Lisp ['defined by', 'writing an interpreter']\n",
      "Lisp ['has dialects', 'because']\n",
      "Lisp ['was meant to be', 'formal model of computation']\n",
      "Lisp ['rare', 'C++']\n",
      "Lisp ['is', 'language']\n",
      "Lisp ['has dialects', '']\n",
      "Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']\n",
      "Lisp ['was regarded as', 'language of AI']\n",
      "Y Combinator ['would have said', 'Stop being so stressed out']\n",
      "Y Combinator ['helps', 'founders']\n",
      "Y Combinator ['is', 'investment firm']\n",
      "company ['reaches breakeven', 'when yahoo buys it']\n",
      "company ['gave', 'business advice']\n",
      "company ['reaches breakeven', 'when Yahoo buys it']\n",
      "software ['worked', 'via web']\n",
      "software ['is', 'web app', \"wasn't clear\", 'it was possible']\n",
      "software ['generate', 'web sites']\n",
      "software ['has', 'three main parts']\n",
      "software ['is', 'online store builder']\n",
      "software ['is', 'web app']\n",
      "software ['is', 'web app', 'common', 'now']\n",
      "Y Combinator ['would have said', 'Stop being so stressed out']\n",
      "Y Combinator ['is', 'investment firm']\n",
      "Y Combinator ['helps', 'founders']\n",
      "company ['gave', 'business advice']\n",
      "company ['reaches breakeven', 'when Yahoo buys it']\n",
      "company ['reaches breakeven', 'when yahoo buys it']\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 5916 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine()\n",
    "\n",
    "response = query_engine.query(\"Tell me more about Interleaf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eecf2d57-3efa-4b0d-941a-95438d42893c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>\n",
       "\n",
       "Interleaf was a software company that made software for creating documents. Their software was inspired by Emacs, and included a scripting language that was a dialect of Lisp. The company was started in the 1990s, and eventually went out of business.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd14686d-1c53-4637-9340-3745f2121ae2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Starting query: Tell me more about what the author worked on at Interleaf\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Query keywords: ['Interleaf', 'author', 'work']\n",
      "ERROR:llama_index.indices.knowledge_graph.retrievers:Index was not constructed with embeddings, skipping embedding usage...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6aa6a716-7390-4783-955b-8169fab25bb1: worth trying.\n",
      "\n",
      "Our teacher, professor Ulivi, was a nice guy. He could see I w...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 79f2a1b4-80bb-416f-a259-ebfc3136b2fe: on a map of New York City: if you zoom in on the Upper East Side, there's a t...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 1e707b8c-b62a-4c1a-a908-c79e77b9692b: buyers pay a lot for such work. [6]\n",
      "\n",
      "There were plenty of earnest students to...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 31c2f53c-928a-4ed0-88fc-df92dba47c33: for example, that the reason the color changes suddenly at a certain point is...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: b1f5a610-9e0a-4e3e-ba96-514ae7d63a84: closures stored in a hash table on the server.\n",
      "\n",
      "It helped to have studied art...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: 6cda9196-dcdb-4441-8f27-ff3f18779c4c: so easy. And that implies that HN was a mistake. Surely the biggest source of...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Querying with idx: a467cf4c-19cf-490f-92ad-ce03c8d91231: I've noticed in my life is how well it has worked, for me at least, to work o...\n",
      "INFO:llama_index.indices.knowledge_graph.retrievers:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`\n",
      "software ['is', 'web app', 'common', 'now']\n",
      "software ['is', 'web app', \"wasn't clear\", 'it was possible']\n",
      "software ['generate', 'web sites']\n",
      "software ['worked', 'via web']\n",
      "software ['is', 'web app']\n",
      "software ['has', 'three main parts']\n",
      "software ['is', 'online store builder']\n",
      "Lisp ['has dialects', 'because']\n",
      "Lisp ['rare', 'C++']\n",
      "Lisp ['is', 'language']\n",
      "Lisp ['has dialects', '']\n",
      "Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']\n",
      "Lisp ['was regarded as', 'language of AI']\n",
      "Lisp ['defined by', 'writing an interpreter']\n",
      "Lisp ['was meant to be', 'formal model of computation']\n",
      "Interleaf ['added', 'scripting language']\n",
      "Interleaf ['made software for', 'creating documents']\n",
      "Interleaf ['was how I learned that', 'low end software tends to eat high end software']\n",
      "Interleaf ['was', 'on the way down']\n",
      "Interleaf ['on the way down', '1993']\n",
      "RISD ['was', 'art school']\n",
      "RISD ['counted me as', 'transfer sophomore']\n",
      "RISD ['was', 'supposed to be the best art school in the country']\n",
      "RISD ['was', 'the best art school in the country']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'robert']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'Robert']\n",
      "Robert ['wrote', 'shopping cart']\n",
      "Robert Morris ['offered', 'unsolicited advice']\n",
      "Yorkville ['is', 'tiny corner']\n",
      "Yorkville [\"wasn't\", 'rich']\n",
      "shopping cart ['written by', 'Robert', 'wrote', 'shopping cart']\n",
      "shopping cart ['written by', 'robert', 'wrote', 'shopping cart']\n",
      "shopping cart ['written by', 'Robert']\n",
      "shopping cart ['written by', 'robert']\n",
      "online ['is not', 'publishing online', 'means', 'you treat the online version as the primary version']\n",
      "online ['is not', 'publishing online']\n",
      "software ['has', 'three main parts']\n",
      "software ['generate', 'web sites']\n",
      "software ['is', 'web app', 'common', 'now']\n",
      "software ['is', 'online store builder']\n",
      "software ['is', 'web app']\n",
      "software ['is', 'web app', \"wasn't clear\", 'it was possible']\n",
      "software ['worked', 'via web']\n",
      "editor ['written by', 'author']\n",
      "YC ['is', 'work', 'is unprestigious', '']\n",
      "YC ['grew', 'more exciting']\n",
      "YC ['founded in', 'Berkeley']\n",
      "YC ['founded in', '2005']\n",
      "YC ['founded in', '1982']\n",
      "YC ['is', 'full-time job']\n",
      "YC ['is', 'engaging work']\n",
      "YC ['is', 'batch model']\n",
      "YC ['is', 'Summer Founders Program']\n",
      "YC ['was', 'coffee shop']\n",
      "YC ['invests in', 'startups']\n",
      "YC ['is', 'fund']\n",
      "YC ['started to notice', 'other advantages']\n",
      "YC ['grew', 'quickly']\n",
      "YC ['controlled by', 'founders']\n",
      "YC ['is', 'work']\n",
      "YC ['became', 'full-time job']\n",
      "YC ['is self-funded', 'by Heroku']\n",
      "YC ['is', 'hard work']\n",
      "YC ['funds', 'startups']\n",
      "YC ['controlled by', 'LLC']\n",
      "Robert ['wrote', 'shopping cart']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'Robert']\n",
      "Robert ['wrote', 'shopping cart', 'written by', 'robert']\n",
      "Lisp ['was meant to be', 'formal model of computation']\n",
      "Lisp ['defined by', 'writing an interpreter']\n",
      "Lisp ['was regarded as', 'language of AI']\n",
      "Lisp ['has dialects', 'because']\n",
      "Lisp ['has dialects', '']\n",
      "Lisp ['has dialects', 'because one of the distinctive features of the language is that it has dialects']\n",
      "Lisp ['rare', 'C++']\n",
      "Lisp ['is', 'language']\n",
      "party ['was', 'clever idea']\n",
      "Y Combinator ['would have said', 'Stop being so stressed out']\n",
      "Y Combinator ['is', 'investment firm']\n",
      "Y Combinator ['helps', 'founders']\n",
      "Robert Morris ['offered', 'unsolicited advice']\n",
      "work ['is unprestigious', '']\n",
      "Jessica Livingston ['is', 'woman']\n",
      "Jessica Livingston ['decided', 'compile book']\n",
      "HN ['edge case', 'bizarre']\n",
      "HN ['edge case', 'when you both write essays and run a forum']\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 4651 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "response = query_engine.query(\n",
    "    \"Tell me more about what the author worked on at Interleaf\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4c87d14-d2d8-4d80-89f6-1e5972973528",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>\n",
       "\n",
       "The author worked on a software that allowed users to create documents, which was inspired by Emacs. The software had a scripting language that was a dialect of Lisp, and the author was responsible for writing things in this language.\n",
       "\n",
       "The author also worked on a software that allowed users to generate web sites. This software was a web app and was written in a dialect of Lisp. The author was also responsible for writing things in this language.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "c13e55f4",
   "metadata": {},
   "source": [
    "## Visualizing the Graph RAG\n",
    "\n",
    "If we visualize the Graph based RAG, starting from the term `['Interleaf', 'history', 'Software', 'Company'] `, we could see how those connected context looks like, and it's a different form of Info./Knowledge:\n",
    "\n",
    "- Refined and Concise Form\n",
    "- Fine-grained Segmentation\n",
    "- Interconnected-sturcutred nature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ba50313",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install ipython-ngql networkx pyvis\n",
    "%load_ext ngql"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab1c77c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Connection Pool Created\n",
      "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n",
      "Get connection to ('127.0.0.1', 9669)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Apple_Vision_Pro</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>basketballplayer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>demo_ai_ops</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>demo_basketballplayer</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>demo_data_lineage</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>demo_fifa_2022</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>demo_fraud_detection</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>demo_identity_resolution</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>demo_movie_recommendation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>demo_sns</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>guardians</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>k8s</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>langchain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>llamaindex</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>paul_graham_essay</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>squid_game</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>test</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         Name\n",
       "0            Apple_Vision_Pro\n",
       "1            basketballplayer\n",
       "2                 demo_ai_ops\n",
       "3       demo_basketballplayer\n",
       "4           demo_data_lineage\n",
       "5              demo_fifa_2022\n",
       "6        demo_fraud_detection\n",
       "7    demo_identity_resolution\n",
       "8   demo_movie_recommendation\n",
       "9                    demo_sns\n",
       "10                  guardians\n",
       "11                        k8s\n",
       "12                  langchain\n",
       "13                 llamaindex\n",
       "14          paul_graham_essay\n",
       "15                 squid_game\n",
       "16                       test"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%ngql --address 127.0.0.1 --port 9669 --user root --password <password>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "797c6dec",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)\n",
      "Get connection to ('127.0.0.1', 9669)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>p</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>(\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>(\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>(\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>(\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   p\n",
       "0  (\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel...\n",
       "1  (\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel...\n",
       "2  (\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel...\n",
       "3  (\"Interleaf\" :entity{name: \"Interleaf\"})-[:rel..."
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%ngql\n",
    "USE paul_graham_essay;\n",
    "MATCH p=(n)-[*1..2]-()\n",
    "  WHERE id(n) IN ['Interleaf', 'history', 'Software', 'Company'] \n",
    "RETURN p LIMIT 100;"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba672c76",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "nebulagraph_draw.html\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "        <iframe\n",
       "            width=\"100%\"\n",
       "            height=\"500px\"\n",
       "            src=\"nebulagraph_draw.html\"\n",
       "            frameborder=\"0\"\n",
       "            allowfullscreen\n",
       "            \n",
       "        ></iframe>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.lib.display.IFrame at 0x148f75a50>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%ng_draw"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "ecc7342a",
   "metadata": {},
   "source": [
    "#### Query with embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b20f9da1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# NOTE: can take a while!\n",
    "\n",
    "index = KnowledgeGraphIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    "    max_triplets_per_chunk=2,\n",
    "    space_name=space_name,\n",
    "    edge_types=edge_types,\n",
    "    rel_prop_names=rel_prop_names,\n",
    "    tags=tags,\n",
    "    include_embeddings=True,\n",
    ")\n",
    "\n",
    "query_engine = index.as_query_engine(\n",
    "    include_text=True,\n",
    "    response_mode=\"tree_summarize\",\n",
    "    embedding_mode=\"hybrid\",\n",
    "    similarity_top_k=5,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01b74b2a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# query using top 3 triplets plus keywords (duplicate triplets are removed)\n",
    "response = query_engine.query(\n",
    "    \"Tell me more about what the author worked on at Interleaf\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02084f6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "a0e29042",
   "metadata": {},
   "source": [
    "#### Query with more global(cross node) context"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed184390",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine(\n",
    "    include_text=True,\n",
    "    response_mode=\"tree_summarize\",\n",
    "    embedding_mode=\"hybrid\",\n",
    "    similarity_top_k=5,\n",
    "    explore_global_knowledge=True,\n",
    ")\n",
    "\n",
    "response = query_engine.query(\"Tell me more about what the author and Lisp\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "cd582500-584c-409a-9963-921738f1beb8",
   "metadata": {},
   "source": [
    "#### Visualizing the Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9fe3d26-4f9a-4651-b83f-0018672a34e4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "        <iframe\n",
       "            width=\"100%\"\n",
       "            height=\"600px\"\n",
       "            src=\"example.html\"\n",
       "            frameborder=\"0\"\n",
       "            allowfullscreen\n",
       "            \n",
       "        ></iframe>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.lib.display.IFrame at 0x127e30c70>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## create graph\n",
    "from pyvis.network import Network\n",
    "\n",
    "g = index.get_networkx_graph()\n",
    "net = Network(notebook=True, cdn_resources=\"in_line\", directed=True)\n",
    "net.from_nx(g)\n",
    "net.show(\"example.html\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "40b97044-d212-4151-bd72-6ea2cff35a29",
   "metadata": {},
   "source": [
    "#### [Optional] Try building the graph and manually add triplets!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f9de2ddb-4e82-438b-ba3a-b7680efed944",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.node_parser import SentenceSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "137176d9-1bc2-4203-8379-7b285cd41546",
   "metadata": {},
   "outputs": [],
   "source": [
    "node_parser = SentenceSplitter()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc609c08-6fce-444c-84cd-a305fcad6bcd",
   "metadata": {},
   "outputs": [],
   "source": [
    "nodes = node_parser.get_nodes_from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21c3ad61-6f2a-4176-96ba-6e9f52d6243d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# not yet implemented\n",
    "\n",
    "# initialize an empty index for now\n",
    "index = KnowledgeGraphIndex.from_documents([], storage_context=storage_context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41e03f7e-bb98-4fe0-9fc0-369be2864a00",
   "metadata": {},
   "outputs": [],
   "source": [
    "# add keyword mappings and nodes manually\n",
    "# add triplets (subject, relationship, object)\n",
    "\n",
    "# for node 0\n",
    "node_0_tups = [\n",
    "    (\"author\", \"worked on\", \"writing\"),\n",
    "    (\"author\", \"worked on\", \"programming\"),\n",
    "]\n",
    "for tup in node_0_tups:\n",
    "    index.upsert_triplet_and_node(tup, nodes[0])\n",
    "\n",
    "# for node 1\n",
    "node_1_tups = [\n",
    "    (\"Interleaf\", \"made software for\", \"creating documents\"),\n",
    "    (\"Interleaf\", \"added\", \"scripting language\"),\n",
    "    (\"software\", \"generate\", \"web sites\"),\n",
    "]\n",
    "for tup in node_1_tups:\n",
    "    index.upsert_triplet_and_node(tup, nodes[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "48b1a666-2f84-4524-851a-66efd2beb611",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine(\n",
    "    include_text=False, response_mode=\"tree_summarize\"\n",
    ")\n",
    "\n",
    "response = query_engine.query(\"Tell me more about Interleaf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb4b99d7-452f-4594-94e9-da10a3a23fb8",
   "metadata": {},
   "outputs": [],
   "source": [
    "str(response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
